
import pandas as pd
import base.datadealing as preprocess
#original_data, original_X, original_Y,combined_training_data,x_train1,x_train2,x_train,x_test,x_val,y_train1,y_train2,y_train,y_test,y_val = preprocess.my_sdp_preprocessor('pc2.csv')
#all_data = original_data, original_X, original_Y,combined_training_data,x_train1,x_train2,x_train,x_test,x_val,y_train1,y_train2,y_train,y_test,y_val
# def NN(original_data, original_X, original_Y,combined_training_data,x_train,x_test,x_val,y_train,y_test,y_val):
#     # 导入Keras库和包
# #     import keras
# #     from keras.models import Sequential
# #     from keras.layers import Dense
# #
# #     # 神经网络初始化
# #     classifier = Sequential()
# #
# #     # 输出层数:15，激活函数为relu函数（修正线性单元），加入非线性因素，输入维度为数据集列数
# #     classifier.add(Dense(output_dim = 15, init = 'uniform', activation = 'relu', input_dim = len(original_X.columns)))
# #     # 加入之后几层的神经网络
# #     classifier.add(Dense(output_dim = 8, init = 'uniform', activation = 'relu'))
# #     classifier.add(Dense(output_dim = 5, init = 'uniform', activation = 'relu'))
# #     # 加入输出层
# #     classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))
# #     # Compiling the ANN
# #     # 优化器：Adam,损失函数为二值交叉熵。
# #     classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
# #     # 使用ANN对训练集进行拟合
# #     classifier.fit(x_train, y_train, batch_size = 10, nb_epoch = 100)
# #
# #     # 使用验证集，用模型进行预测
# #     y_pred = classifier.predict(x_val)
# #     y_pred = (y_pred > 0.5)
# #     y_pred = pd.DataFrame(y_pred, columns=['Defective'])
# #     # Making the Confusion Matrix
# #     # 计算混淆矩阵
# #     from sklearn.metrics import confusion_matrix
# #     cm = confusion_matrix(y_val, y_pred)
# #     # 计算正确率
# #     from sklearn.metrics import accuracy_score
# #     accuracy_score(y_val, y_pred)
# #     return classifier

# 随机森林
#x_val,y_val
def random_forest(original_data, original_X, original_Y,combined_training_data,x_train,x_test,y_train,y_test):
    from sklearn.ensemble import RandomForestClassifier
    # n_estimators:随机森林中的决策树的数量，max_depth：树的最大深度，random_state：随机数种子
    clf = RandomForestClassifier(n_estimators=100, max_depth=5,random_state=0)
    # 对训练集进行拟合
    clf.fit(x_train, y_train)
    return clf

# 支持向量机
def svm(original_data, original_X, original_Y,combined_training_data,x_train,x_test,y_train,y_test):
    from sklearn.svm import SVC
    # 核函数使用默认函数，即特征值平均
    clf = SVC(gamma='auto')
    clf.fit(x_train, y_train)
    return clf

def nb(original_data, original_X, original_Y,combined_training_data,x_train,x_test,y_train,y_test):
    from sklearn.naive_bayes import MultinomialNB
    # 朴素贝叶斯算法预估器
    nb = MultinomialNB()
    nb.fit(x_train,y_train)
    return nb

# 卷积NN
# def cnn(original_data, original_X, original_Y,combined_training_data,x_train,x_test,x_val   ,y_train,y_test,y_val):
#     from keras.models import Sequential
#     from keras.layers import Dense,Dropout,Conv2D,Conv1D,Flatten,MaxPool2D
#     #create model
#
#     x_train_matrix = x_train.values
#     x_val_matrix = x_val.values
#     y_train_matrix = y_train.values
#     y_val_matrix = y_val.values
#
#     # 设置图片像素
#     img_rows, img_cols = 1,len(original_X.columns)
#     # 将训练集与验证集矩阵重构，数据类型处理
#     # 四维通常加了纵向的不同样本叠加成的数据集整体：长宽两位，通道一位，数据集一位
#     x_train1 = x_train_matrix.reshape(x_train_matrix.shape[0], img_rows, img_cols, 1)
#     x_val1 = x_val_matrix.reshape(x_val_matrix.shape[0], img_rows, img_cols, 1)
#     # 设置数据输入的图片形状
#     input_shape = (img_rows, img_cols, 1)
#     # 初始化神经网络
#     model = Sequential()
#     # 卷积层：设置输入层，输出空间的维度为64，kernel_size为2D卷积窗口的规模，input_shape设置输入图片规模，激活函数为relu函数（修正线性单元）
#     model.add(Conv2D(64, kernel_size=1, activation='relu',input_shape=input_shape))
#     model.add(Conv2D(32, kernel_size=1, activation='relu'))
#     model.add(Conv2D(16, kernel_size=1, activation='relu'))
#     # 向量降维
#     model.add(Flatten())
#     # 全连接层：继续使用Dense进行卷积
#     model.add(Dense(8, activation='relu'))
#     model.add(Dense(1, activation='sigmoid'))
#     # 使用准确性编译模型以衡量模型性能
#     model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
#     # 模型训练
#     model.fit(x_train1, y_train_matrix, epochs=40)
#     # 获取结果
#     y_pred = model.predict(x_val1)>0.5
#     y_pred_df = pd.DataFrame(y_pred)
#
#     return model

